# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface

import scrapy
from scrapy.pipelines.files import FilesPipeline
from urllib.parse import urlparse
from os.path import basename, dirname, join

class ZhihuPipeline(FilesPipeline):
    # def process_item(self, item, spider):
    #     return item

    def get_media_requests(self, item, info):
        print(item)
        yield scrapy.Request(item['file_url'], meta={'title': item['file_name']})

    def file_path(self, request, response=None, info=None, *, item=None):
        pdf_url = urlparse(request.url).path
        name = request.meta.get('title') + '.pdf'
        print('-------------------------------------')
        print(pdf_url + name)
        return join(basename(dirname(pdf_url)), basename(name))
